* 01_cr_ivs is called by main to create df of all IVs.
* sources of data are documented below. 
*-------------------------------------------------------------------------------
* BAAS committees data 
* --------------------
	/*
	Source:
 	MacLeod, R, and Peter Jeffrey Collins. 1981. The Parliament of Science : 
	The British Association for the Advancement of Science, 1831-1981. 
	Northwood: Science Reviews.
	
	entered by hand
	*/
*-------------------------------------------------------------------------------
	
	import excel "${IV}baas.xlsx", firstrow

	drop attendance 
	
	save "${IV}baas.dta", replace

	clear 

		
		
		
*-------------------------------------------------------------------------------
* RSS Fellows data 
* ----------------
	/*
	Source:
	Rosenbaum, S. 1984. “The Growth of the Royal Statistical Society.” 
	Journal of the Royal Statistical Society. Series A (General) 147 (2). 
	[Royal Statistical Society, Wiley]: 375–88. doi:10.2307/2981692.
	
	entered by hand
	*/
*-------------------------------------------------------------------------------
	
	import excel "${IV}fellows.xlsx", sheet("Tabelle1") firstrow
	
	save "${IV}fellows.dta", replace

	clear 
		

		
		
*-------------------------------------------------------------------------------
* Journals data 
* -------------
	/*
	Source:
	OCLC WorldCat, “the world’s largest network of library content” 
	containing collections of more than 10,000 libraries worldwide. 
	
	see `cr_19c_journals' in script folder for code that creates this df
	*/
*-------------------------------------------------------------------------------
	
	import delimited "${IV}19c_journals.csv"
		
	
	
	
*-------------------------------------------------------------------------------
* Merge annual time series 
* ------------------------
	* Merge BAAS & Fellows with Journals series, 
	* save as temp for later re-integration
*-------------------------------------------------------------------------------
	
	merge 1:1 year using "${IV}baas.dta"
	drop _merge

	
	merge 1:1 year using "${IV}fellows.dta"
	drop _merge
	
		
	save "${IV}temp_series.dta", replace
	
	
	erase "${IV}baas.dta"
	erase "${IV}fellows.dta"
		
	
	
	
*-------------------------------------------------------------------------------
* Varieties of Democracy Data V10 AND 
* International Congress Data
* -------------------------------
	/* 
	Sources:
	https://www.v-dem.net/dsarchive.html
	
	see `cr_west_vdem10' in script folder for code that creates this df
	
	
	Union of International Associations (UIA). 1960. _Les congrès 
	internationaux, liste complète. International congresses, full list
	https://worldcat.org/title/23419582
	
	see `cr_congresses' in script folder for code that creates this df
	*/
*-------------------------------------------------------------------------------

	
	* Import excerpted vdem 10 data 

		use	"${IV}west_vdem10.dta"
		drop index		

		
		
	* Rename variables
	
		rename v2x_* *
		rename v2* *
		rename v3* *
		rename e_* *

		rename	suffr 			suffrage
		rename	pelifeex		lifeex

		rename	stcitlaw		ctznshp 
		rename	stflag			flag
		rename	stnatant		anthem
		rename	stnatbank		bank
		rename	svindep			indpndnt
		rename	peprisch		sch_enrll
		
		rename	stcensus		census
		rename	ststatag		agency
		rename	ststybcov		yrbkcov
		rename	ststybpub		yrbkpub
		rename	canuni			n_unis
		
		rename	miinteco		int_cnflct
		rename	miinterc		dmst_cnflct
				
		rename	country_name 	state

			
			
	* Generate sum of all censuses ever taken

		cap drop census_all
		by state (year), sort: gen census_all = sum(census)
		
		
		
	* Generate indicator of first ever census

		cap drop census_ever
		by state (year), sort:	///
						 gen census_ever = ///
						 census_all == 1 & ///
						 census_all[_n - 1] != census_all
	
	
	
	* Generate indicator if there was ever a census conducted
										
		cap drop census_ever_all
		by state (year), sort: gen census_ever_all = ///
						 sum(census_ever)

		
		
	* Drop & rename
	
		cap drop census_ever census
		rename census_ever_all census_ever
		
		
		
	* Merge in country-year congress dataset
	
		merge 1:1 state year using "${IV}19c_congresses_cy.dta"
		drop _merge


		
	* Take out UK data for later re-integration

		preserve
			keep if state == "United Kingdom"
			drop state
			rename * uk_*
			rename uk_year year
			save "${IV}uk_vdem10.dta", replace
		restore


		
	* Compute west-wide institutional characteristics, sans UK
		
			drop if state == "United Kingdom" 

			global	world	suffrage ///
							indpndnt ///
							sch_enrll ///
							ctznshp ///
							flag ///
							anthem ///
							bank ///
							agency ///
							yrbkcov ///
							yrbkpub ///
							lifeex ///
							civil_war ///
							int_cnflct ///
							dmst_cnflct ///
							census_ever 
							
			 
			 
			* Take annual averages / proportions
				
				foreach var in $world {
					cap drop wst_`var'
					egen wst_`var' = mean(`var'), by(year)
					}
					
					
			* Count unis & congresses; 
			* doesn't make sense to average by year
			
				global sum_vars		n_unis ///
									n_confs_ttl ///
									n_confs_sci_ref ///
									n_confs_sci
									
				foreach var in $sum_vars {
					cap drop wst_`var'
					egen wst_`var' = sum(`var'), by(year)
					}						

			* Count states
			
				cap drop wst_n_states
				egen wst_n_states = count(state), by(year)
									
						
			* Create year obs dataset 
			
				keep year wst* 
				egen year_tag = tag(year)
				keep if year_tag == 1
				drop year_tag

			
			
	* Merge in uk vdem and time series data

		merge 1:1 year using "${IV}uk_vdem10.dta"
		drop _merge
		merge 1:1 year using "${IV}temp_series.dta"
		drop _merge
		
		erase "${IV}uk_vdem10.dta"
		erase "${IV}temp_series.dta"
	
	
	
	* Impute school enrollment vdem data
			
		* Impute values of enrollment rates, pre-1820
		* Assume the slope 1803–1819 == slope 1820-1830
		
		
			* Enrollment in UK

				cap drop	imp_uk_sch_enrll
				gen 		imp_uk_sch_enrll = uk_sch_enrll

				reg uk_sch_enrll year if year > 1819 & year < 1830

				forv i = 1803(1)1819 {
					replace imp_uk_sch_enrll = `i' * _b[year] ///
												   + _b[_cons] ///
												   if year == `i'
				}

				
			* Enrollment across West
			
				cap drop 	imp_wst_sch_enrll
				gen 		imp_wst_sch_enrll = wst_sch_enrll

				reg wst_sch_enrll year if year > 1819 & year < 1830

				forv i = 1803(1)1819 {
					replace imp_wst_sch_enrll = `i' * _b[year] ///
													+ _b[_cons] ///
													if year == `i'
				}
		
	

	* Create measure of system-wide scientization
				
			global scientization		wst_census_ever /// 
										wst_agency /// 
										wst_yrbkpub /// 
										uk_yrbkpub ///
										uk_census_all ///
										stats_journals /// 
										soc_journals /// 
										fellows /// 
										uk_n_confs_sci wst_n_confs_sci /// 
										uk_agency ///
										committees ///  
										wst_n_unis 
					

					
			* Reliability analysis
			
				alpha $scientization, std item 
				factor $scientization, ipf
			
			
			* Standardized & lagged index
			
				cap drop scientization		
				predict scientization  
				
				egen std_sci = std(scientization)
				drop scientization
				rename std_sci scientization		
			
			
			
			
*-------------------------------------------------------------------------------
* INDUSTRIALIZATION SERIES
* ------------------------
	/*
	Sources:
	Broadberry, S., Campbell, B. M. S., Klein, A., Overton, M., & van Leeuwen, 
	B. (2015). British Economic Growth, 1270–1870. Cambridge University Press. 
	https://doi.org/10.1017/CBO9781107707603
	
	National income, output and expenditure of the United Kingdom 1855-1965 by 
	Charles Feinstein, published by Cambridge University Press, 1972 
	
	csvs below extracted from xls files included in ivs folder

	*/
*-------------------------------------------------------------------------------


	* Broadberry's agricultural & industrial output series, 1270–1870
	
		frame create b_prod
		frame change b_prod
		import delimited "${IV}uk_prod_broadberry.csv"
		rename ag b_ag
		rename ind b_ind


		
	* Feinstein's agricultural & industrial output series, 1855–1965
		
		frame create f_prod
		frame change f_prod
		import delimited "${IV}uk_prod_feinstein.csv"
		rename ag f_ag
		rename ind f_ind 
		save "${IV}temp_f_prod.dta", replace


		
	* Merge Feinsnteins later estimates into the Broadberry frame
	
		frame change b_prod
		merge 1:1 year using "${IV}temp_f_prod.dta"
		cap drop _merge
		cap drop f_prod

		
		
	* Feinstein's & Broadberry's estimates are differently indexed:
	* 1700 = 100, 1914 = 100, respectively.
	* Convert F's and B's estimates in terms of B, for overlapping years
	* by computing the ratio between each series and multiplying it by
	* Feinstein's series 
	
		* Ag Series
		
			gen f2b_ag = b_ag / f_ag
			quietly su f2b_ag
			global link_ag = `r(mean)'
		
		
		* Ind Series
		
			gen f2b_ind = b_ind / f_ind
			quietly su f2b_ind
			global link_ind = `r(mean)'		

			
			
	* Convert Feinstein's post-1870 series in terms of Broadberry's
	* using link factor 
	
		* Ag Series, F qua (in terms of) B
		
			gen fqb_ag = f_ag*$link_ag if year > 1870
			
			
			* intialize combo ag measure with B's
			
				gen fb_ag = b_ag 	
			
			
			* fill in with F's, in terms of B
			
				replace fb_ag = fqb_ag if year > 1870
		
		
		* Ind Series 
		
			gen  fqb_ind = f_ind*$link_ind if year > 1870
			gen fb_ind = b_ind 
			replace fb_ind = fqb_ind if year > 1870


	
	* Inspect and clean up 		
		
		* Keep 19C only 
		drop if year < 1800
		
		* Keep only combined measures
		keep year b_ag b_ind f_ag f_ind fb_ind fb_ag
		
		
	frame change default 
		
	
	* Grab industrialization 
	
		frlink 1:1 year, frame(b_prod)
		frget *, from(b_prod)

		
			
					
*-------------------------------------------------------------------------------
* PARLIAMENTARY DEBATE DATA
* --------------------------------
	/*
	Source:
	These data contain LDA topic loadings for each speech in the UK parliament. 
	The original data can be found here: 
	https://www.hansard-archive.parliament.uk/
	Code and analyses for the LDA available upon request.
	*/
*-------------------------------------------------------------------------------
	
	frame create talk 
	frame change talk 
	
	import delimited "${IV}yr_topic106.csv", bindquote(strict)
	
	
	* save and re-import
	
		save "yr_t106.dta"
		frame change default
		cap drop drop _merge
		merge m:1 year using "yr_t106.dta"
		drop _merge
		erase "yr_t106.dta"


		
		
*-------------------------------------------------------------------------------
* Clean up
*-------------------------------------------------------------------------------

		cap erase "${IV}uk_vdem10.dta"
		cap erase "${IV}temp_series.dta"
		frame drop b_prod
		frame drop f_prod
		frame drop talk
